import os
import re
import glob

dirpath = '/mnt/f/EndMemberCollections/16symbols-cxy-hmd'
# {'AW_PBE V 08Apr2002', 'AW_PBE Ta 17Jan2003', 'AW_PBE Cu 05Jan2001', 'AW_PBE Mn 06Sep2000', 'AW_PBE Ni 06Sep2000', 
# 'AW_PBE Cr 06Sep2000', 'AW_PBE Co 06Sep2000', 'AW_PBE Ti 08Apr2002', 'AW_PBE Fe 06Sep2000', 'AW_PBE Re 17Jan2003', 
# 'AW_PBE W 08Apr2002', 'AW_PBE Mo 08Apr2002', 'AW_PBE Al 04Jan2001'}
# {'04Jan2001': 348, '06Sep2000': 1590, '05Jan2001': 276, '08Apr2002': 1748, '17Jan2003': 713}
# dirpath = '/mnt/f/EndMemberCollections/Hf-Nb-Zr-byCXY/Hf-Nb-Zr-CONTAIN-END-MEMBER'
# {'04Jan2001': 252, '06Sep2000': 2135, '05Jan2001': 236, '20Jan2003': 1665, '08Apr2002': 3996, '17Jan2003': 578}
# {'AW_PBE Nb_pv 08Apr2002', 'AW_PBE Re 17Jan2003', 'AW_PBE Fe 06Sep2000', 'AW_PBE Mn 06Sep2000', 'AW_PBE Cr 06Sep2000', 
#  'AW_PBE Hf 20Jan2003', 'AW_PBE Mo 08Apr2002', 'AW_PBE Al 04Jan2001', 'AW_PBE Cu 05Jan2001', 'AW_PBE W 08Apr2002', 
#  'AW_PBE Ta 17Jan2003', 'AW_PBE Zr 08Apr2002', 'AW_PBE Co 06Sep2000', 'AW_PBE Ti 08Apr2002', 'AW_PBE V 08Apr2002', 'AW_PBE Ni 06Sep2000'}


potcar_files = glob.glob(os.path.join(dirpath, '**', 'POTCAR'), recursive=True)
print(len(potcar_files))
vers = []
for potcar in potcar_files:
    with open(potcar, 'r', encoding='utf-8') as f:
        text = f.read()
    pattern = r'TITEL\s.\=\s.([^\r\n]+)'
    matches = re.findall(pattern, text)
    if not matches:
        continue
    vers.extend(m.strip() for m in matches)
vers.sort()
# for v in vers:
#     print(v)

print(set(vers))
# vers 词频统计
ver_dict = {}
for ver in vers:
    try:
        
        sour, elem, times = ver.split(" ")
    except:
        print(ver)
        continue
    # times = times[5:]
    if times not in ver_dict:
        ver_dict[times] = 1
    else:
        ver_dict[times] += 1
print(ver_dict)